# Import the Data File
library(readr)
Air_Quality_Health_Data <- read_csv("air_quality_health_monthly.csv")
## Rows: 498 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): city, year_month, population_density
## dbl (9): aqi, pm2_5, pm10, no2, o3, temperature, humidity, hospital_admissio...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(Air_Quality_Health_Data)
MyData <- Air_Quality_Health_Data # Renames the data file
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.1 ✔ fma 2.5
## ✔ forecast 8.24.0 ✔ expsmooth 2.3
## Warning: package 'forecast' was built under R version 4.4.1
##
library(TTR)
attributes(MyData)
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
## [415] 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
## [433] 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
## [451] 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
## [469] 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
## [487] 487 488 489 490 491 492 493 494 495 496 497 498
##
## $names
## [1] "city" "year_month" "aqi"
## [4] "pm2_5" "pm10" "no2"
## [7] "o3" "temperature" "humidity"
## [10] "hospital_admissions" "hospital_capacity" "population_density"
##
## $spec
## cols(
## city = col_character(),
## year_month = col_character(),
## aqi = col_double(),
## pm2_5 = col_double(),
## pm10 = col_double(),
## no2 = col_double(),
## o3 = col_double(),
## temperature = col_double(),
## humidity = col_double(),
## hospital_admissions = col_double(),
## hospital_capacity = col_double(),
## population_density = col_character()
## )
##
## $problems
## <pointer: 0x12c7d4950>
##
## $class
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
plot(MyData)

# Extracts the numerical columns
df_numeric <- MyData[, sapply(MyData, is.numeric)]
# Loop through each numeric column and plot ACF
for (colname in names(df_numeric)){
series <- ts(df_numeric[[colname]])
# Open a new plotting window for each
acf(series, main = paste("ACF for", colname))
}









# Mean Forecast
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_meanf <- meanf(series,12)
acc_meanf <- accuracy(model_meanf)
plot(model_meanf, main = paste("Mean Forecast for", colname))
}









# Naive Forecast
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_naivef <- naive(series,12)
acc_naivef <- accuracy(model_naivef)
plot(model_naivef, main = paste("Naive Forecast for", colname))
}









# Random Walk
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_rwf <- rwf(series,12, drift = TRUE)
acc_rwf <- accuracy(model_rwf)
plot(model_rwf, main = paste("Random Walk Forecast for", colname))
}









# Seasonal Naive
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_snaivef <- snaive(series,12, drift = TRUE)
acc_snaivef <- accuracy(model_snaivef)
plot(model_snaivef, main = paste("Seasonal Naive Forecast for", colname))
}









# Moving Averages
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_MA5 <- ma(series, order = 5)
model_MA5f <- forecast(model_MA5,12)
acc_MA5f <- accuracy(model_MA5f)
plot(model_MA5f, main = paste("Moving Average (with the (Order 5) Forecast for", colname))
}
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series


for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]]) # convert to time series object
model_MA9 <- ma(series, order = 9)
model_MA9f <- forecast(model_MA9,12)
acc_MA9f <- accuracy(model_MA9f)
plot(model_MA9f, main = paste("Moving Average (with the (Order 9) Forecast for", colname))
}
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series
## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series

## Warning in ets(object, lambda = lambda, biasadj = biasadj,
## allow.multiplicative.trend = allow.multiplicative.trend, : Missing values
## encountered. Using longest contiguous portion of time series


# Holt Winters
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]], frequency = 12) # convert to time series object
model_hw <- HoltWinters(series) # Fit Holt-Winters model
model_hwf <- forecast(model_hw,12)
acc_hwf <- accuracy(model_hwf)
plot(model_hwf, main = paste("Holt-Winters Forecast for", colname))
}



## Warning in HoltWinters(series): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH






# Simple Smoothing
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]], frequency = 12) # convert to time series object
model_ses_hw <- HoltWinters(series, beta = FALSE, gamma = FALSE)
model_ses_hwf <- forecast(model_ses_hw,12)
acc_ses_hwf <- accuracy(model_ses_hwf)
plot(model_ses_hwf, main = paste("Simple Smoothing (Holt-Winters) Forecast for", colname))
}









# Decomposition
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]], frequency = 12) # convert to time series object
model_ets <- ets(series) # Model for the ETS model
model_ets_forecast <- forecast(model_ets,h = 12)
acc_ets_forecast <- accuracy(model_ets_forecast)
plot(model_ets_forecast, main = paste("ETS Model Forecast for", colname))
# Access specific attributes if you wish
print(paste("MSE for", colname, ":", model_ets$mse))
print(attributes(model_ets)) # shows model components
}

## [1] "MSE for aqi : 8528.45773264847"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for pm2_5 : 93.5670040960724"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for pm10 : 172.484814732366"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for no2 : 40.7302124576175"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for o3 : 62.8597900178732"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for temperature : 69.3211126058046"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for humidity : 173.63845479386"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for hospital_admissions : 376.279802606846"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"

## [1] "MSE for hospital_capacity : 221794.400500221"
## $names
## [1] "loglik" "aic" "bic" "aicc" "mse"
## [6] "amse" "fit" "residuals" "fitted" "states"
## [11] "par" "m" "method" "series" "components"
## [16] "call" "initstate" "sigma2" "x"
##
## $class
## [1] "ets"
# Plot time series and different models in one chart
for (colname in names(df_numeric)) {
series <- ts(df_numeric[[colname]], frequency = 12)
# --- Fit all models ---
model_meanf <- meanf(series, 12)
model_naivef <- naive(series, 12)
model_rwf <- rwf(series, 12, drift = TRUE)
model_snaivef <- snaive(series, 12)
model_hw <- HoltWinters(series)
model_hwf <- forecast(model_hw, 12)
model_ses_hw <- HoltWinters(series, beta = FALSE, gamma = FALSE)
model_ses_hwf <- forecast(model_ses_hw, 12)
model_ets <- ets(series)
model_ets_forecast <- forecast(model_ets, 12)
# --- Plot original series first ---
plot(series, main = paste("Forecast Comparison for", colname),
xlim = c(time(series)[1], time(series)[length(series)] + 12/12),
ylim = range(c(series, model_meanf$mean, model_naivef$mean,
model_rwf$mean, model_snaivef$mean,
model_hwf$mean, model_ses_hwf$mean, model_ets_forecast$mean)),
col = "black", lwd = 2, ylab = "Value", xlab = "Time")
# --- Add forecasts from all models ---
lines(model_meanf$mean, col = "blue", lwd = 2, lty = 1)
lines(model_naivef$mean, col = "red", lwd = 2, lty = 2)
lines(model_rwf$mean, col = "green", lwd = 2, lty = 3)
lines(model_snaivef$mean, col = "purple", lwd = 2, lty = 4)
lines(model_hwf$mean, col = "orange", lwd = 2, lty = 5)
lines(model_ses_hwf$mean, col = "brown", lwd = 2, lty = 6)
lines(model_ets_forecast$mean, col = "pink", lwd = 2, lty = 7)
# --- Add legend ---
legend("topleft",
legend = c("Original Series", "Mean", "Naive", "Random Walk", "Seasonal Naive",
"Holt-Winters", "Simple Smoothing", "ETS"),
col = c("black", "blue", "red", "green", "purple", "orange", "brown", "pink"),
lty = c(1,1,2,3,4,5,6,7),
lwd = 2, cex = 0.8)
}



## Warning in HoltWinters(series): optimization difficulties: ERROR:
## ABNORMAL_TERMINATION_IN_LNSRCH






# Accuracy measure used for comparison is RMSE and MAPE
# Out of all the models, the Root Mean Squared Error (RMSE) and Mean Absolute Percentage Error (MAPE) for moving averages model of the order 9 is 73.06942 and 3.920257, respectively. It is the lowest across all the models therefore the moving average with the order 9 provides better accuracy in your forecast for the next 12 months.